library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.3
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.0 v purrr 0.3.3
## v tibble 3.0.0 v dplyr 0.8.5
## v tidyr 1.0.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## Warning: package 'ggplot2' was built under R version 3.6.3
## Warning: package 'tibble' was built under R version 3.6.3
## Warning: package 'tidyr' was built under R version 3.6.3
## Warning: package 'readr' was built under R version 3.6.3
## Warning: package 'purrr' was built under R version 3.6.3
## Warning: package 'dplyr' was built under R version 3.6.3
## Warning: package 'stringr' was built under R version 3.6.3
## Warning: package 'forcats' was built under R version 3.6.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(maps)
## Warning: package 'maps' was built under R version 3.6.3
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
library(mapdata)
## Warning: package 'mapdata' was built under R version 3.6.3
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.6.3
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(viridis)
## Warning: package 'viridis' was built under R version 3.6.3
## Loading required package: viridisLite
## Warning: package 'viridisLite' was built under R version 3.6.3
library(wesanderson)
## Warning: package 'wesanderson' was built under R version 3.6.3
daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>%
rename(Long = "Long_")
## Parsed with column specification:
## cols(
## FIPS = col_character(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character()
## )
ggplot(daily_report, aes(x = Long, y = Lat, size = Confirmed/1000)) +
borders("world", colour = NA, fill = "grey90") +
theme_bw() +
geom_point(shape = 21, color='purple', fill='purple', alpha = 0.5) +
labs(title = 'World COVID-19 Confirmed cases',x = '', y = '',
size="Cases (x1000))") +
theme(legend.position = "right") +
coord_fixed(ratio=1.5)
## Warning: Removed 54 rows containing missing values (geom_point).

daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-05-2020.csv")) %>%
rename(Long = "Long_") %>%
filter(Country_Region == "US") %>%
filter (!Province_State %in% c("Alaska","Hawaii", "American Samoa",
"Puerto Rico","Northern Mariana Islands",
"Virgin Islands", "Recovered", "Guam", "Grand Princess",
"District of Columbia", "Diamond Princess")) %>%
filter(Lat > 0)
## Parsed with column specification:
## cols(
## FIPS = col_character(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character()
## )
ggplot(daily_report, aes(x = Long, y = Lat, size = Confirmed/1000)) +
borders("state", colour = "black", fill = "grey90") +
theme_bw() +
geom_point(shape = 21, color='purple', fill='purple', alpha = 0.5) +
labs(title = 'COVID-19 Confirmed Cases in the US', x = '', y = '',
size="Cases (x1000))") +
theme(legend.position = "right") +
coord_fixed(ratio=1.5)

mybreaks <- c(1, 100, 1000, 10000, 10000)
ggplot(daily_report, aes(x = Long, y = Lat, size = Confirmed)) +
borders("state", colour = "white", fill = "grey90") +
geom_point(aes(x=Long, y=Lat, size=Confirmed, color=Confirmed),stroke=F, alpha=0.7) +
scale_size_continuous(name="Cases", trans="log", range=c(1,7),
breaks=mybreaks, labels = c("1-99",
"100-999", "1,000-9,999", "10,000-99,999", "50,000+")) +
scale_color_viridis_c(option="viridis",name="Cases",
trans="log", breaks=mybreaks, labels = c("1-99",
"100-999", "1,000-9,999", "10,000-99,999", "50,000+")) +
# Cleaning up the graph
theme_void() +
guides( colour = guide_legend()) +
labs(title = "COVID-19 Confirmed Cases in the US'") +
theme(
legend.position = "bottom",
text = element_text(color = "#22211d"),
plot.background = element_rect(fill = "#ffffff", color = NA),
panel.background = element_rect(fill = "#ffffff", color = NA),
legend.background = element_rect(fill = "#ffffff", color = NA)
) +
coord_fixed(ratio=1.5)
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 40 rows containing missing values (geom_point).

daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>%
rename(Long = "Long_") %>%
filter(Country_Region == "US") %>%
group_by(Province_State) %>%
summarize(Confirmed = sum(Confirmed)) %>%
mutate(Province_State = tolower(Province_State))
## Parsed with column specification:
## cols(
## FIPS = col_character(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character()
## )
us <- map_data("state")
state_join <- left_join(us, daily_report, by = c("region" = "Province_State"))
ggplot(data = us, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) + geom_polygon(data = state_join, aes(fill = Confirmed), color = "black") +
scale_fill_gradientn(colours =
wes_palette("Zissou1", 100, type = "continuous"),
trans = "log10") +
labs(title = "COVID-19 Confirmed Cases in the US'")

library(RColorBrewer)
report_03_27_2020 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>%
rename(Long = "Long_") %>%
unite(Key, Admin2, Province_State, sep = ".") %>%
group_by(Key) %>%
summarize(Confirmed = sum(Confirmed)) %>%
mutate(Key = tolower(Key))
## Parsed with column specification:
## cols(
## FIPS = col_character(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character()
## )
us <- map_data("state")
counties <- map_data("county") %>%
unite(Key, subregion, region, sep = ".", remove = FALSE)
state_join <- left_join(counties, report_03_27_2020, by = c("Key"))
ggplot(data = us, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
borders("state", colour = "black") +
geom_polygon(data = state_join, aes(fill = Confirmed)) +
scale_fill_gradientn(colors = brewer.pal(n = 5, name = "PuRd"),
breaks = c(1, 10, 100, 1000, 10000, 100000),
trans = "log10", na.value = "White") +
ggtitle("Number of Confirmed Cases by US County") +
theme_bw()
## Warning: Transformation introduced infinite values in discrete y-axis

daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>%
rename(Long = "Long_") %>%
filter(Province_State == "Massachusetts") %>%
group_by(Admin2) %>%
summarize(Confirmed = sum(Confirmed)) %>%
mutate(Admin2 = tolower(Admin2))
## Parsed with column specification:
## cols(
## FIPS = col_character(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character()
## )
us <- map_data("state")
ma_us <- subset(us, region == "massachusetts")
counties <- map_data("county")
ma_county <- subset(counties, region == "massachusetts")
state_join <- left_join(ma_county, daily_report, by = c("subregion" = "Admin2"))
ggplot(data = ma_county, mapping = aes(x = long, y = lat, group = group)) + coord_fixed(1.3) +
geom_polygon(data = state_join, aes(fill = Confirmed), color = "white") +
scale_fill_gradientn(colors = brewer.pal(n = 5, name = "BuGn"),
trans = "log10") +
labs(title = "COVID-19 Confirmed Cases in Massachusetts'")

daily_report
## # A tibble: 14 x 2
## Admin2 Confirmed
## <chr> <dbl>
## 1 barnstable 283
## 2 berkshire 213
## 3 bristol 424
## 4 dukes and nantucket 12
## 5 essex 1039
## 6 franklin 85
## 7 hampden 546
## 8 hampshire 102
## 9 middlesex 1870
## 10 norfolk 938
## 11 plymouth 621
## 12 suffolk 1896
## 13 unassigned 270
## 14 worcester 667
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ggplotly(
ggplot(data = ma_county, mapping = aes(x = long, y = lat, group = group)) + coord_fixed(1.3)
+ geom_polygon(data = state_join, aes(fill = Confirmed), color = "black")
+ scale_fill_gradientn(colours = wes_palette("Zissou1", 100, type = "continuous"))
+ ggtitle("COVID-19 Cases in MA") +
labs(x=NULL, y=NULL) +
theme(panel.border = element_blank()) +
theme(panel.background = element_blank()) +
theme(axis.ticks = element_blank()) +
theme(axis.text = element_blank()))
time_series_confirmed_long <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")) %>% rename(Province_State = "Province/State", Country_Region = "Country/Region") %>% pivot_longer(-c(Province_State, Country_Region, Lat, Long), names_to = "Date", values_to = "Confirmed")
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
time_series_deaths_long <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")) %>% rename(Province_State = "Province/State", Country_Region = "Country/Region") %>% pivot_longer(-c(Province_State, Country_Region, Lat, Long), names_to = "Date", values_to = "Deaths")
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
time_series_recovered_long <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv")) %>% rename(Province_State = "Province/State", Country_Region = "Country/Region") %>% pivot_longer(-c(Province_State, Country_Region, Lat, Long), names_to = "Date", values_to = "Recovered")
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
time_series_confirmed_long <- time_series_confirmed_long %>% unite(Key, Province_State, Country_Region, Date, sep = ".", remove = FALSE)
time_series_deaths_long <- time_series_deaths_long %>% unite(Key, Province_State, Country_Region, Date, sep = ".") %>% select(Key, Deaths)
time_series_recovered_long <- time_series_recovered_long %>% unite(Key, Province_State, Country_Region, Date, sep = ".") %>% select(Key, Recovered)
time_series_long_joined <- full_join(time_series_confirmed_long, time_series_deaths_long, by = c("Key"))
time_series_long_joined <- full_join(time_series_long_joined, time_series_recovered_long, by = c("Key")) %>%select(-Key)
time_series_long_joined$Date <- mdy(time_series_long_joined$Date)
time_series_long_joined_counts <- time_series_long_joined %>% pivot_longer(-c(Province_State, Country_Region, Lat, Long, Date), names_to = "Report_Type", values_to = "Counts")
library(ggplot2)
library(gganimate)
## Warning: package 'gganimate' was built under R version 3.6.3
library(transformr)
## Warning: package 'transformr' was built under R version 3.6.3
theme_set(theme_bw())
data_time <- time_series_long_joined %>%
group_by(Country_Region,Date) %>%
summarise_at(c("Confirmed", "Deaths", "Recovered"), sum) %>%
filter (Country_Region %in% c("China","Korea, South","Japan","Italy","US"))
p <- ggplot(data_time, aes(x = Date, y = Confirmed, color = Country_Region)) +
geom_point() +
geom_line() +
ggtitle("Confirmed COVID-19 Cases") +
geom_point(aes(group = seq_along(Date))) +
transition_reveal(Date)
animate(p, renderer = gifski_renderer(), end_pause = 15)

covid <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")) %>%
rename(Province_State= "Province/State", Country_Region = "Country/Region") %>%
pivot_longer(-c(Province_State, Country_Region, Lat, Long),
names_to = "Date", values_to = "Confirmed") %>%
mutate(Date = mdy(Date) - days(1),
Place = paste(Lat,Long,sep="_")) %>%
group_by(Place,Date) %>%
summarise(cumulative_cases = ifelse(sum(Confirmed)>0,
sum(Confirmed),NA_real_),
Lat = mean(Lat),
Long = mean(Long)) %>%
mutate(Pandemic_day = as.numeric(Date - min(Date)))
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
world <- ggplot(covid,aes(x = Long, y = Lat, size = cumulative_cases/1000)) +
borders("world", colour = "gray50", fill = "grey90") +
theme_bw() +
geom_point(color='purple', alpha = .5) +
labs(title = 'Pandemic Day: {frame}',x = '', y = '',
size="Cases (x1000))") +
theme(legend.position = "right") +
coord_fixed(ratio=1.3)+
transition_time(Date) +
enter_fade()
animate(world, renderer = gifski_renderer(), end_pause = 30)
## Warning: Removed 229 rows containing missing values (geom_point).
## Warning: Removed 229 rows containing missing values (geom_point).
## Warning: Removed 221 rows containing missing values (geom_point).
## Warning: Removed 219 rows containing missing values (geom_point).
## Warning: Removed 216 rows containing missing values (geom_point).
## Warning: Removed 213 rows containing missing values (geom_point).
## Warning: Removed 419 rows containing missing values (geom_point).
## Warning: Removed 206 rows containing missing values (geom_point).
## Warning: Removed 203 rows containing missing values (geom_point).
## Warning: Removed 199 rows containing missing values (geom_point).
## Warning: Removed 197 rows containing missing values (geom_point).
## Warning: Removed 197 rows containing missing values (geom_point).
## Warning: Removed 197 rows containing missing values (geom_point).
## Warning: Removed 196 rows containing missing values (geom_point).
## Warning: Removed 196 rows containing missing values (geom_point).
## Warning: Removed 196 rows containing missing values (geom_point).
## Warning: Removed 390 rows containing missing values (geom_point).
## Warning: Removed 195 rows containing missing values (geom_point).
## Warning: Removed 195 rows containing missing values (geom_point).
## Warning: Removed 195 rows containing missing values (geom_point).
## Warning: Removed 195 rows containing missing values (geom_point).
## Warning: Removed 195 rows containing missing values (geom_point).
## Warning: Removed 194 rows containing missing values (geom_point).
## Warning: Removed 194 rows containing missing values (geom_point).
## Warning: Removed 194 rows containing missing values (geom_point).
## Warning: Removed 194 rows containing missing values (geom_point).
## Warning: Removed 387 rows containing missing values (geom_point).
## Warning: Removed 193 rows containing missing values (geom_point).
## Warning: Removed 191 rows containing missing values (geom_point).
## Warning: Removed 191 rows containing missing values (geom_point).
## Warning: Removed 191 rows containing missing values (geom_point).
## Warning: Removed 186 rows containing missing values (geom_point).
## Warning: Removed 182 rows containing missing values (geom_point).
## Warning: Removed 175 rows containing missing values (geom_point).
## Warning: Removed 171 rows containing missing values (geom_point).
## Warning: Removed 164 rows containing missing values (geom_point).
## Warning: Removed 313 rows containing missing values (geom_point).
## Warning: Removed 146 rows containing missing values (geom_point).
## Warning: Removed 142 rows containing missing values (geom_point).
## Warning: Removed 134 rows containing missing values (geom_point).
## Warning: Removed 130 rows containing missing values (geom_point).
## Warning: Removed 121 rows containing missing values (geom_point).
## Warning: Removed 118 rows containing missing values (geom_point).
## Warning: Removed 113 rows containing missing values (geom_point).
## Warning: Removed 109 rows containing missing values (geom_point).
## Warning: Removed 200 rows containing missing values (geom_point).
## Warning: Removed 94 rows containing missing values (geom_point).
## Warning: Removed 80 rows containing missing values (geom_point).
## Warning: Removed 65 rows containing missing values (geom_point).
## Warning: Removed 59 rows containing missing values (geom_point).
## Warning: Removed 52 rows containing missing values (geom_point).
## Warning: Removed 49 rows containing missing values (geom_point).
## Warning: Removed 44 rows containing missing values (geom_point).
## Warning: Removed 38 rows containing missing values (geom_point).
## Warning: Removed 29 rows containing missing values (geom_point).
## Warning: Removed 49 rows containing missing values (geom_point).
## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 20 rows containing missing values (geom_point).
## Warning: Removed 17 rows containing missing values (geom_point).
## Warning: Removed 14 rows containing missing values (geom_point).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 10 rows containing missing values (geom_point).
## Warning: Removed 10 rows containing missing values (geom_point).
## Warning: Removed 9 rows containing missing values (geom_point).
## Warning: Removed 7 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).

Exercises


## Parsed with column specification:
## cols(
## FIPS = col_character(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character()
## )

## Parsed with column specification:
## cols(
## FIPS = col_character(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character()
## )